{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to convert all the string values to upper, lower cases in a given pandas series. Also find the length of the string values."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>score</th>\n",
       "      <th>attempts</th>\n",
       "      <th>qualify</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>Anastasia</td>\n",
       "      <td>12.5</td>\n",
       "      <td>1</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>Dima</td>\n",
       "      <td>9.0</td>\n",
       "      <td>3</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>Katherine</td>\n",
       "      <td>16.5</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>James</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>Emily</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>f</th>\n",
       "      <td>Michael</td>\n",
       "      <td>20.0</td>\n",
       "      <td>3</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>g</th>\n",
       "      <td>Matthew</td>\n",
       "      <td>14.5</td>\n",
       "      <td>1</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>h</th>\n",
       "      <td>Laura</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>i</th>\n",
       "      <td>Kevin</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>j</th>\n",
       "      <td>Jonas</td>\n",
       "      <td>19.0</td>\n",
       "      <td>1</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        name  score  attempts qualify\n",
       "a  Anastasia   12.5         1     yes\n",
       "b       Dima    9.0         3      no\n",
       "c  Katherine   16.5         2     yes\n",
       "d      James    NaN         3      no\n",
       "e      Emily    9.0         2      no\n",
       "f    Michael   20.0         3     yes\n",
       "g    Matthew   14.5         1     yes\n",
       "h      Laura    NaN         1      no\n",
       "i      Kevin    8.0         2      no\n",
       "j      Jonas   19.0         1     yes"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "exam_data  = {'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas'],\n",
    "        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],\n",
    "        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],\n",
    "        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}\n",
    "labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']\n",
    "\n",
    "df = pd.DataFrame(exam_data , index=labels)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    Anastasia\n",
       "b         Dima\n",
       "c    Katherine\n",
       "d        James\n",
       "e        Emily\n",
       "f      Michael\n",
       "g      Matthew\n",
       "h        Laura\n",
       "i        Kevin\n",
       "j        Jonas\n",
       "Name: name, dtype: object"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = df['name']\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    ANASTASIA\n",
       "b         DIMA\n",
       "c    KATHERINE\n",
       "d        JAMES\n",
       "e        EMILY\n",
       "f      MICHAEL\n",
       "g      MATTHEW\n",
       "h        LAURA\n",
       "i        KEVIN\n",
       "j        JONAS\n",
       "Name: name, dtype: object"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.str.upper()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    anastasia\n",
       "b         dima\n",
       "c    katherine\n",
       "d        james\n",
       "e        emily\n",
       "f      michael\n",
       "g      matthew\n",
       "h        laura\n",
       "i        kevin\n",
       "j        jonas\n",
       "Name: name, dtype: object"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.str.lower()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to remove whitespaces, left sided whitespaces and right sided whitespaces of the string values of a given pandas series.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([' Green', 'Black ', ' Red ', 'White', ' Pink '], dtype='object')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "color1 = pd.Index([' Green', 'Black ', ' Red ', 'White', ' Pink '])\n",
    "color1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Remove whitespace\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Index(['Green', 'Black', 'Red', 'White', 'Pink'], dtype='object')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nRemove whitespace\")\n",
    "color1.str.strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Remove left sided whitespace\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Index(['Green', 'Black ', 'Red ', 'White', 'Pink '], dtype='object')"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nRemove left sided whitespace\")\n",
    "color1.str.lstrip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Remove Right sided whitespace\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Index([' Green', 'Black', ' Red', 'White', ' Pink'], dtype='object')"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nRemove Right sided whitespace\")\n",
    "color1.str.rstrip()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to add leading zeros to the integer column in a pandas series and makes the length of the field to 8 digit."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original dataframe:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>40000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>500000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   amount\n",
       "0      10\n",
       "1     250\n",
       "2    3000\n",
       "3   40000\n",
       "4  500000"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nums = {'amount': [10, 250, 3000, 40000, 500000]}\n",
    "print(\"Original dataframe:\")\n",
    "df = pd.DataFrame(nums)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>00000010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>00000250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>00003000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>00040000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>00500000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     amount\n",
       "0  00000010\n",
       "1  00000250\n",
       "2  00003000\n",
       "3  00040000\n",
       "4  00500000"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['amount'] = df['amount'].apply(lambda x: '{0:0>8}'.format(x))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to add leading zeros to the character column in a pandas series and makes the length of the field to 8 digit.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>date_of_birth</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>alberto</td>\n",
       "      <td>17/05/2002</td>\n",
       "      <td>18.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>gino</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>21.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ryan</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Eesha</td>\n",
       "      <td>11/05/2002</td>\n",
       "      <td>22.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>syed</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      name date_of_birth    age\n",
       "0  alberto     17/05/2002  18.5\n",
       "1     gino     16/02/1999  21.2\n",
       "2     ryan     25/09/1998  22.5\n",
       "3    Eesha     11/05/2002  22.0\n",
       "4     syed     15/09/1997  23.0"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'name': ['alberto','gino','ryan', 'Eesha', 'syed'],\n",
    "    'date_of_birth ': ['17/05/2002','16/02/1999','25/09/1998','11/05/2002','15/09/1997'],\n",
    "    'age': [18.5, 21.2, 22.5, 22, 23]\n",
    "})\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "After capitalizing name column:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>date_of_birth</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alberto</td>\n",
       "      <td>17/05/2002</td>\n",
       "      <td>18.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Gino</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>21.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Ryan</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Eesha</td>\n",
       "      <td>11/05/2002</td>\n",
       "      <td>22.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Syed</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      name date_of_birth    age\n",
       "0  Alberto     17/05/2002  18.5\n",
       "1     Gino     16/02/1999  21.2\n",
       "2     Ryan     25/09/1998  22.5\n",
       "3    Eesha     11/05/2002  22.0\n",
       "4     Syed     15/09/1997  23.0"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nAfter capitalizing name column:\")\n",
    "df['name'] = list(map(lambda x: x.capitalize(), df['name']))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to count of occurrence of a specified substring in a DataFrame column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name_code</th>\n",
       "      <th>date_of_birth</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c001</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>18.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c002</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>21.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c022</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c2002</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>22.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c2222</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  name_code date_of_birth    age\n",
       "0      c001     12/05/2002  18.5\n",
       "1      c002     16/02/1999  21.2\n",
       "2      c022     25/09/1998  22.5\n",
       "3     c2002     12/02/2022  22.0\n",
       "4     c2222     15/09/1997  23.0"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'name_code': ['c001','c002','c022', 'c2002', 'c2222'],\n",
    "    'date_of_birth ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'age': [18.5, 21.2, 22.5, 22, 23]\n",
    "})\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Count occurrence of 2 in date_of_birth column:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name_code</th>\n",
       "      <th>date_of_birth</th>\n",
       "      <th>age</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c001</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>18.5</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c002</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>21.2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c022</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c2002</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>22.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c2222</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  name_code date_of_birth    age  count\n",
       "0      c001     12/05/2002  18.5      0\n",
       "1      c002     16/02/1999  21.2      1\n",
       "2      c022     25/09/1998  22.5      2\n",
       "3     c2002     12/02/2022  22.0      2\n",
       "4     c2222     15/09/1997  23.0      4"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nCount occurrence of 2 in date_of_birth column:\")\n",
    "df['count'] = list(map(lambda x: x.count(\"2\"), df['name_code']))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to find the index of a substring of DataFrame with beginning and end position."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name_code</th>\n",
       "      <th>date_of_birth</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>18.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1000c</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>21.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>b00c2</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>b2c02</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>22.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c2222</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  name_code date_of_birth    age\n",
       "0     c0001     12/05/2002  18.5\n",
       "1     1000c     16/02/1999  21.2\n",
       "2     b00c2     25/09/1998  22.5\n",
       "3     b2c02     12/02/2022  22.0\n",
       "4     c2222     15/09/1997  23.0"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'name_code': ['c0001','1000c','b00c2', 'b2c02', 'c2222'],\n",
    "    'date_of_birth ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'age': [18.5, 21.2, 22.5, 22, 23]\n",
    "})\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Index of a substring in a specified column of a dataframe:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name_code</th>\n",
       "      <th>date_of_birth</th>\n",
       "      <th>age</th>\n",
       "      <th>Index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>18.5</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1000c</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>21.2</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>b00c2</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>b2c02</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>22.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c2222</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  name_code date_of_birth    age  Index\n",
       "0     c0001     12/05/2002  18.5      0\n",
       "1     1000c     16/02/1999  21.2      4\n",
       "2     b00c2     25/09/1998  22.5      3\n",
       "3     b2c02     12/02/2022  22.0      2\n",
       "4     c2222     15/09/1997  23.0      0"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nIndex of a substring in a specified column of a dataframe:\")\n",
    "df['Index'] = list(map(lambda x: x.find('c', 0, 5), df['name_code']))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to check whether alpha numeric values present in a given column of a DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name_code</th>\n",
       "      <th>date_of_birth</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Company</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>18.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Company a001</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>21.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Company 123</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1234</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>22.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Company 12</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      name_code date_of_birth    age\n",
       "0       Company     12/05/2002  18.5\n",
       "1  Company a001     16/02/1999  21.2\n",
       "2   Company 123     25/09/1998  22.5\n",
       "3          1234     12/02/2022  22.0\n",
       "4    Company 12     15/09/1997  23.0"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'name_code': ['Company','Company a001','Company 123', '1234', 'Company 12'],\n",
    "    'date_of_birth ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'age': [18.5, 21.2, 22.5, 22, 23]\n",
    "})\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Whether all characters in the string are alphanumeric?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name_code</th>\n",
       "      <th>date_of_birth</th>\n",
       "      <th>age</th>\n",
       "      <th>name_code_is_alphanumeric</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Company</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>18.5</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Company a001</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>21.2</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Company 123</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1234</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>22.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Company 12</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      name_code date_of_birth    age  name_code_is_alphanumeric\n",
       "0       Company     12/05/2002  18.5                       True\n",
       "1  Company a001     16/02/1999  21.2                      False\n",
       "2   Company 123     25/09/1998  22.5                      False\n",
       "3          1234     12/02/2022  22.0                       True\n",
       "4    Company 12     15/09/1997  23.0                      False"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nWhether all characters in the string are alphanumeric?\")\n",
    "df['name_code_is_alphanumeric'] = list(map(lambda x: x.isalnum(), df['name_code']))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to check whether alphabetic values present in a given column of a DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Company</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Company a001</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Company 123</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>abcd</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Company 12</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   company_code date_of_sale   sale_amount\n",
       "0       Company    12/05/2002      12348.5\n",
       "1  Company a001    16/02/1999     233331.2\n",
       "2   Company 123    25/09/1998         22.5\n",
       "3          abcd    12/02/2022    2566552.0\n",
       "4    Company 12    15/09/1997         23.0"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'company_code': ['Company','Company a001','Company 123', 'abcd', 'Company 12'],\n",
    "    'date_of_sale ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]})\n",
    "\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Whether Alphabetic values present in company_code column?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "      <th>company_code_is_alpha</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Company</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Company a001</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Company 123</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>abcd</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Company 12</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   company_code date_of_sale   sale_amount  company_code_is_alpha\n",
       "0       Company    12/05/2002      12348.5                   True\n",
       "1  Company a001    16/02/1999     233331.2                  False\n",
       "2   Company 123    25/09/1998         22.5                  False\n",
       "3          abcd    12/02/2022    2566552.0                   True\n",
       "4    Company 12    15/09/1997         23.0                  False"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nWhether Alphabetic values present in company_code column?\")\n",
    "df['company_code_is_alpha'] = list(map(lambda x: x.isalpha(), df['company_code']))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to find the index of a given substring of a DataFrame column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name_code</th>\n",
       "      <th>date_of_birth</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c001</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>18.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c002</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>21.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c022</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c2002</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>22.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c2222</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  name_code date_of_birth    age\n",
       "0      c001     12/05/2002  18.5\n",
       "1      c002     16/02/1999  21.2\n",
       "2      c022     25/09/1998  22.5\n",
       "3     c2002     12/02/2022  22.0\n",
       "4     c2222     15/09/1997  23.0"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'name_code': ['c001','c002','c022', 'c2002', 'c2222'],\n",
    "    'date_of_birth ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'age': [18.5, 21.2, 22.5, 22, 23]\n",
    "})\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Count occurrence of 22 in date_of_birth column:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name_code</th>\n",
       "      <th>date_of_birth</th>\n",
       "      <th>age</th>\n",
       "      <th>Index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c001</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>18.5</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c002</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>21.2</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c022</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c2002</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>22.0</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c2222</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  name_code date_of_birth    age  Index\n",
       "0      c001     12/05/2002  18.5     -1\n",
       "1      c002     16/02/1999  21.2     -1\n",
       "2      c022     25/09/1998  22.5      2\n",
       "3     c2002     12/02/2022  22.0     -1\n",
       "4     c2222     15/09/1997  23.0      1"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nCount occurrence of 22 in date_of_birth column:\")\n",
    "df['Index'] = list(map(lambda x: x.find('22'), df['name_code']))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to check whether only numeric values present in a given column of a DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Company</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Company a001</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2055</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>abcd</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>123345</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   company_code date_of_sale   sale_amount\n",
       "0       Company    12/05/2002      12348.5\n",
       "1  Company a001    16/02/1999     233331.2\n",
       "2          2055    25/09/1998         22.5\n",
       "3          abcd    12/02/2022    2566552.0\n",
       "4        123345    15/09/1997         23.0"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'company_code': ['Company','Company a001', '2055', 'abcd', '123345'],\n",
    "    'date_of_sale ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]})\n",
    "\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Numeric values present in company_code column:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "      <th>company_code_is_digit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Company</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Company a001</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2055</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>abcd</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>123345</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   company_code date_of_sale   sale_amount  company_code_is_digit\n",
       "0       Company    12/05/2002      12348.5                  False\n",
       "1  Company a001    16/02/1999     233331.2                  False\n",
       "2          2055    25/09/1998         22.5                   True\n",
       "3          abcd    12/02/2022    2566552.0                  False\n",
       "4        123345    15/09/1997         23.0                   True"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nNumeric values present in company_code column:\")\n",
    "df['company_code_is_digit'] = list(map(lambda x: x.isdigit(), df['company_code']))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to check whether only lower case or upper case is present in a given column of a DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ABCD</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>hhhh</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>abcd</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>EAWQaaa</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale   sale_amount\n",
       "0         ABCD    12/05/2002      12348.5\n",
       "1         EFGF    16/02/1999     233331.2\n",
       "2         hhhh    25/09/1998         22.5\n",
       "3         abcd    12/02/2022    2566552.0\n",
       "4      EAWQaaa    15/09/1997         23.0"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'company_code': ['ABCD','EFGF', 'hhhh', 'abcd', 'EAWQaaa'],\n",
    "    'date_of_sale ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Is lower (company_code)?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "      <th>company_code_ul_cases</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ABCD</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>hhhh</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>abcd</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>EAWQaaa</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale   sale_amount  company_code_ul_cases\n",
       "0         ABCD    12/05/2002      12348.5                  False\n",
       "1         EFGF    16/02/1999     233331.2                  False\n",
       "2         hhhh    25/09/1998         22.5                   True\n",
       "3         abcd    12/02/2022    2566552.0                   True\n",
       "4      EAWQaaa    15/09/1997         23.0                  False"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nIs lower (company_code)?\")\n",
    "df['company_code_ul_cases'] = list(map(lambda x: x.islower(), df['company_code']))\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Is Upper (company_code)?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "      <th>company_code_ul_cases</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ABCD</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>hhhh</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>abcd</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>EAWQaaa</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale   sale_amount  company_code_ul_cases\n",
       "0         ABCD    12/05/2002      12348.5                   True\n",
       "1         EFGF    16/02/1999     233331.2                   True\n",
       "2         hhhh    25/09/1998         22.5                  False\n",
       "3         abcd    12/02/2022    2566552.0                  False\n",
       "4      EAWQaaa    15/09/1997         23.0                  False"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nIs Upper (company_code)?\")\n",
    "df['company_code_ul_cases'] = list(map(lambda x: x.isupper(), df['company_code']))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to check whether only proper case or title case is present in a given column of a DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Hhhh</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>abcd</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>EAWQaaa</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale   sale_amount\n",
       "0         Abcd    12/05/2002      12348.5\n",
       "1         EFGF    16/02/1999     233331.2\n",
       "2         Hhhh    25/09/1998         22.5\n",
       "3         abcd    12/02/2022    2566552.0\n",
       "4      EAWQaaa    15/09/1997         23.0"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'company_code': ['Abcd','EFGF', 'Hhhh', 'abcd', 'EAWQaaa'],\n",
    "    'date_of_sale ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]})\n",
    "\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Is proper case or title case?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "      <th>company_code_is_title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Hhhh</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>abcd</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>EAWQaaa</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale   sale_amount  company_code_is_title\n",
       "0         Abcd    12/05/2002      12348.5                   True\n",
       "1         EFGF    16/02/1999     233331.2                  False\n",
       "2         Hhhh    25/09/1998         22.5                   True\n",
       "3         abcd    12/02/2022    2566552.0                  False\n",
       "4      EAWQaaa    15/09/1997         23.0                  False"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nIs proper case or title case?\")\n",
    "df['company_code_is_title'] = list(map(lambda x: x.istitle(), df['company_code']))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to check whether only space is present in a given column of a DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>abcd</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale   sale_amount\n",
       "0         Abcd    12/05/2002      12348.5\n",
       "1        EFGF     16/02/1999     233331.2\n",
       "2                 25/09/1998         22.5\n",
       "3         abcd    12/02/2022    2566552.0\n",
       "4                 15/09/1997         23.0"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'company_code': ['Abcd','EFGF ', '  ', 'abcd', ' '],\n",
    "    'date_of_sale ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]})\n",
    "\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Is space is present?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "      <th>company_code_is_title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>abcd</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale   sale_amount  company_code_is_title\n",
       "0         Abcd    12/05/2002      12348.5                  False\n",
       "1        EFGF     16/02/1999     233331.2                  False\n",
       "2                 25/09/1998         22.5                   True\n",
       "3         abcd    12/02/2022    2566552.0                  False\n",
       "4                 15/09/1997         23.0                   True"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nIs space is present?\")\n",
    "df['company_code_is_title'] = list(map(lambda x: x.isspace(), df['company_code']))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to get the length of the string present of a given column in a DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>skfsalf</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>safsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale   sale_amount\n",
       "0         Abcd    12/05/2002      12348.5\n",
       "1         EFGF    16/02/1999     233331.2\n",
       "2      skfsalf    25/09/1998         22.5\n",
       "3      sdfslew    12/02/2022    2566552.0\n",
       "4       safsdf    15/09/1997         23.0"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'company_code': ['Abcd','EFGF', 'skfsalf', 'sdfslew', 'safsdf'],\n",
    "    'date_of_sale ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]})\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Length of the string in a column:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "      <th>company_code_length</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>skfsalf</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>safsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale   sale_amount  company_code_length\n",
       "0         Abcd    12/05/2002      12348.5                    4\n",
       "1         EFGF    16/02/1999     233331.2                    4\n",
       "2      skfsalf    25/09/1998         22.5                    7\n",
       "3      sdfslew    12/02/2022    2566552.0                    7\n",
       "4       safsdf    15/09/1997         23.0                    6"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nLength of the string in a column:\")\n",
    "df['company_code_length'] = df['company_code'].apply(len)\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to get the length of the integer of a given column in a DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>skfsalf</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>safsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale  sale_amount\n",
       "0         Abcd   12/05/2002      12348.5\n",
       "1         EFGF   16/02/1999     233331.2\n",
       "2      skfsalf   25/09/1998         22.5\n",
       "3      sdfslew   12/02/2022    2566552.0\n",
       "4       safsdf   15/09/1997         23.0"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'company_code': ['Abcd','EFGF', 'skfsalf', 'sdfslew', 'safsdf'],\n",
    "    'date_of_sale': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]\n",
    "})\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Length of sale_amount:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "      <th>sale_amount_length</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>skfsalf</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>safsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale  sale_amount  sale_amount_length\n",
       "0         Abcd   12/05/2002      12348.5                   7\n",
       "1         EFGF   16/02/1999     233331.2                   8\n",
       "2      skfsalf   25/09/1998         22.5                   4\n",
       "3      sdfslew   12/02/2022    2566552.0                   9\n",
       "4       safsdf   15/09/1997         23.0                   4"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nLength of sale_amount:\")\n",
    "df['sale_amount_length'] = df['sale_amount'].map(str).apply(len)\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to check if a specified column starts with a specified string in a DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>zefsalf</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>zekfsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale  sale_amount\n",
       "0         Abcd   12/05/2002      12348.5\n",
       "1         EFGF   16/02/1999     233331.2\n",
       "2      zefsalf   25/09/1998         22.5\n",
       "3      sdfslew   12/02/2022    2566552.0\n",
       "4      zekfsdf   15/09/1997         23.0"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'company_code': ['Abcd','EFGF', 'zefsalf', 'sdfslew', 'zekfsdf'],\n",
    "    'date_of_sale': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]\n",
    "})\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "If a specified column starts with a specified string?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "      <th>company_code_starts_with</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>zefsalf</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>zekfsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale  sale_amount  company_code_starts_with\n",
       "0         Abcd   12/05/2002      12348.5                     False\n",
       "1         EFGF   16/02/1999     233331.2                     False\n",
       "2      zefsalf   25/09/1998         22.5                      True\n",
       "3      sdfslew   12/02/2022    2566552.0                     False\n",
       "4      zekfsdf   15/09/1997         23.0                      True"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nIf a specified column starts with a specified string?\")\n",
    "df['company_code_starts_with'] = list(\n",
    "    map(lambda x: x.startswith('ze'), df['company_code']))\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to swap the cases of a specified character column in a given DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>zefsalf</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>zekfsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale  sale_amount\n",
       "0         Abcd   12/05/2002      12348.5\n",
       "1         EFGF   16/02/1999     233331.2\n",
       "2      zefsalf   25/09/1998         22.5\n",
       "3      sdfslew   12/02/2022    2566552.0\n",
       "4      zekfsdf   15/09/1997         23.0"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'company_code': ['Abcd','EFGF', 'zefsalf', 'sdfslew', 'zekfsdf'],\n",
    "    'date_of_sale': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]\n",
    "})\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "      <th>swapped_company_code</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "      <td>aBCD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "      <td>efgf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>zefsalf</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>ZEFSALF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "      <td>SDFSLEW</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>zekfsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>ZEKFSDF</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale  sale_amount swapped_company_code\n",
       "0         Abcd   12/05/2002      12348.5                 aBCD\n",
       "1         EFGF   16/02/1999     233331.2                 efgf\n",
       "2      zefsalf   25/09/1998         22.5              ZEFSALF\n",
       "3      sdfslew   12/02/2022    2566552.0              SDFSLEW\n",
       "4      zekfsdf   15/09/1997         23.0              ZEKFSDF"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['swapped_company_code'] = list(map(lambda x: x.swapcase(), df['company_code']))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to convert a specified character column in upper/lower cases in a given DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>zefsalf</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>zekfsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale  sale_amount\n",
       "0         Abcd   12/05/2002      12348.5\n",
       "1         EFGF   16/02/1999     233331.2\n",
       "2      zefsalf   25/09/1998         22.5\n",
       "3      sdfslew   12/02/2022    2566552.0\n",
       "4      zekfsdf   15/09/1997         23.0"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'company_code': ['Abcd','EFGF', 'zefsalf', 'sdfslew', 'zekfsdf'],\n",
    "    'date_of_sale': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]\n",
    "})\n",
    "\n",
    "df1 = pd.DataFrame({\n",
    "    'company_code': ['Abcd','EFGF', 'zefsalf', 'sdfslew', 'zekfsdf'],\n",
    "    'date_of_sale': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]\n",
    "})\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "      <th>upper_company_code</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "      <td>ABCD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "      <td>EFGF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>zefsalf</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>ZEFSALF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "      <td>SDFSLEW</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>zekfsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>ZEKFSDF</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale  sale_amount upper_company_code\n",
       "0         Abcd   12/05/2002      12348.5               ABCD\n",
       "1         EFGF   16/02/1999     233331.2               EFGF\n",
       "2      zefsalf   25/09/1998         22.5            ZEFSALF\n",
       "3      sdfslew   12/02/2022    2566552.0            SDFSLEW\n",
       "4      zekfsdf   15/09/1997         23.0            ZEKFSDF"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['upper_company_code'] = list(map(lambda x: x.upper(), df['company_code']))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to convert a specified character column in title case in a given DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>zefsalf</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>zekfsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale  sale_amount\n",
       "0         Abcd   12/05/2002      12348.5\n",
       "1         EFGF   16/02/1999     233331.2\n",
       "2      zefsalf   25/09/1998         22.5\n",
       "3      sdfslew   12/02/2022    2566552.0\n",
       "4      zekfsdf   15/09/1997         23.0"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'company_code': ['Abcd','EFGF', 'zefsalf', 'sdfslew', 'zekfsdf'],\n",
    "    'date_of_sale': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]\n",
    "})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "      <th>company_code_title_cases</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "      <td>Abcd</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "      <td>Efgf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>zefsalf</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>Zefsalf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "      <td>Sdfslew</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>zekfsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>Zekfsdf</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale  sale_amount company_code_title_cases\n",
       "0         Abcd   12/05/2002      12348.5                     Abcd\n",
       "1         EFGF   16/02/1999     233331.2                     Efgf\n",
       "2      zefsalf   25/09/1998         22.5                  Zefsalf\n",
       "3      sdfslew   12/02/2022    2566552.0                  Sdfslew\n",
       "4      zekfsdf   15/09/1997         23.0                  Zekfsdf"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['company_code_title_cases'] = list(map(lambda x: x.title(), df['company_code']))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to replace arbitrary values with other values in a given DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>D</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale  sale_amount\n",
       "0            A   12/05/2002      12348.5\n",
       "1            B   16/02/1999     233331.2\n",
       "2            C   25/09/1998         22.5\n",
       "3            D   12/02/2022    2566552.0\n",
       "4            A   15/09/1997         23.0"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'company_code': ['A','B', 'C', 'D', 'A'],\n",
    "    'date_of_sale': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],\n",
    "    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]\n",
    "})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Replace A with c:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>D</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>C</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale  sale_amount\n",
       "0            C   12/05/2002      12348.5\n",
       "1            B   16/02/1999     233331.2\n",
       "2            C   25/09/1998         22.5\n",
       "3            D   12/02/2022    2566552.0\n",
       "4            C   15/09/1997         23.0"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nReplace A with c:\")\n",
    "df = df.replace(\"A\", \"C\")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to split a string of a column of a given DataFrame into multiple columns.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>date_of_birth</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alberto  Franco</td>\n",
       "      <td>17/05/2002</td>\n",
       "      <td>18.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Gino Ann Mcneill</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>21.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Ryan  Parkes</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Eesha Artur Hinton</td>\n",
       "      <td>11/05/2002</td>\n",
       "      <td>22.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Syed  Wharton</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 name date_of_birth    age\n",
       "0     Alberto  Franco     17/05/2002  18.5\n",
       "1    Gino Ann Mcneill     16/02/1999  21.2\n",
       "2        Ryan  Parkes     25/09/1998  22.5\n",
       "3  Eesha Artur Hinton     11/05/2002  22.0\n",
       "4       Syed  Wharton     15/09/1997  23.0"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'name': ['Alberto  Franco','Gino Ann Mcneill','Ryan  Parkes', 'Eesha Artur Hinton', 'Syed  Wharton'],\n",
    "    'date_of_birth ': ['17/05/2002','16/02/1999','25/09/1998','11/05/2002','15/09/1997'],\n",
    "    'age': [18.5, 21.2, 22.5, 22, 23]\n",
    "})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "New DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>date_of_birth</th>\n",
       "      <th>age</th>\n",
       "      <th>first</th>\n",
       "      <th>middle</th>\n",
       "      <th>last</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alberto  Franco</td>\n",
       "      <td>17/05/2002</td>\n",
       "      <td>18.5</td>\n",
       "      <td>Alberto</td>\n",
       "      <td></td>\n",
       "      <td>Franco</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Gino Ann Mcneill</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>21.2</td>\n",
       "      <td>Gino</td>\n",
       "      <td>Ann</td>\n",
       "      <td>Mcneill</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Ryan  Parkes</td>\n",
       "      <td>25/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>Ryan</td>\n",
       "      <td></td>\n",
       "      <td>Parkes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Eesha Artur Hinton</td>\n",
       "      <td>11/05/2002</td>\n",
       "      <td>22.0</td>\n",
       "      <td>Eesha</td>\n",
       "      <td>Artur</td>\n",
       "      <td>Hinton</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Syed  Wharton</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>Syed</td>\n",
       "      <td></td>\n",
       "      <td>Wharton</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 name date_of_birth    age    first middle     last\n",
       "0     Alberto  Franco     17/05/2002  18.5  Alberto          Franco\n",
       "1    Gino Ann Mcneill     16/02/1999  21.2     Gino    Ann  Mcneill\n",
       "2        Ryan  Parkes     25/09/1998  22.5     Ryan          Parkes\n",
       "3  Eesha Artur Hinton     11/05/2002  22.0    Eesha  Artur   Hinton\n",
       "4       Syed  Wharton     15/09/1997  23.0     Syed         Wharton"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[[\"first\", \"middle\", \"last\"]] = df[\"name\"].str.split(\" \", expand = True)\n",
    "print(\"\\nNew DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to extract email from a specified column of string type of a given DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name_email</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alberto Franco af@gmail.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Gino Mcneill gm@yahoo.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Ryan Parkes rp@abc.io</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Eesha Hinton</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Gino Mcneill gm@github.com</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    name_email\n",
       "0  Alberto Franco af@gmail.com\n",
       "1    Gino Mcneill gm@yahoo.com\n",
       "2        Ryan Parkes rp@abc.io\n",
       "3                 Eesha Hinton\n",
       "4   Gino Mcneill gm@github.com"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import re as re\n",
    "pd.set_option('display.max_columns', 10)\n",
    "df = pd.DataFrame({\n",
    "    'name_email': ['Alberto Franco af@gmail.com','Gino Mcneill gm@yahoo.com','Ryan Parkes rp@abc.io', 'Eesha Hinton', 'Gino Mcneill gm@github.com']\n",
    "    })\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_email(text):\n",
    "    email = re.findall(r'[\\w\\.-]+@[\\w\\.-]+',str(text))\n",
    "    return \",\".join(email)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name_email</th>\n",
       "      <th>email</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alberto Franco af@gmail.com</td>\n",
       "      <td>af@gmail.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Gino Mcneill gm@yahoo.com</td>\n",
       "      <td>gm@yahoo.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Ryan Parkes rp@abc.io</td>\n",
       "      <td>rp@abc.io</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Eesha Hinton</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Gino Mcneill gm@github.com</td>\n",
       "      <td>gm@github.com</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    name_email          email\n",
       "0  Alberto Franco af@gmail.com   af@gmail.com\n",
       "1    Gino Mcneill gm@yahoo.com   gm@yahoo.com\n",
       "2        Ryan Parkes rp@abc.io      rp@abc.io\n",
       "3                 Eesha Hinton               \n",
       "4   Gino Mcneill gm@github.com  gm@github.com"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['email']=df['name_email'].apply(lambda x: find_email(x))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to extract hash attached word from twitter text from the specified column of a given DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tweets</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>#Obama says goodbye</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Retweets for #cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A political endorsement in #Indonesia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1 dog = many #retweets</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Just a simple #egg</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  tweets\n",
       "0                    #Obama says goodbye\n",
       "1                     Retweets for #cash\n",
       "2  A political endorsement in #Indonesia\n",
       "3                 1 dog = many #retweets\n",
       "4                     Just a simple #egg"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.max_columns', 10)\n",
    "df = pd.DataFrame({\n",
    "    'tweets': ['#Obama says goodbye','Retweets for #cash','A political endorsement in #Indonesia', '1 dog = many #retweets', 'Just a simple #egg']\n",
    "    })\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_hash(text):\n",
    "    hword=re.findall(r'(?<=#)\\w+',text)\n",
    "    return \" \".join(hword)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tweets</th>\n",
       "      <th>hash_word</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>#Obama says goodbye</td>\n",
       "      <td>Obama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Retweets for #cash</td>\n",
       "      <td>cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A political endorsement in #Indonesia</td>\n",
       "      <td>Indonesia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1 dog = many #retweets</td>\n",
       "      <td>retweets</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Just a simple #egg</td>\n",
       "      <td>egg</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  tweets  hash_word\n",
       "0                    #Obama says goodbye      Obama\n",
       "1                     Retweets for #cash       cash\n",
       "2  A political endorsement in #Indonesia  Indonesia\n",
       "3                 1 dog = many #retweets   retweets\n",
       "4                     Just a simple #egg        egg"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['hash_word']=df['tweets'].apply(lambda x: find_hash(x))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to extract word mention someone in tweets using @ from the specified column of a given DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tweets</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>@Obama says goodbye</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Retweets for @cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A political endorsement in @Indonesia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1 dog = many #retweets</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Just a simple #egg</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  tweets\n",
       "0                    @Obama says goodbye\n",
       "1                     Retweets for @cash\n",
       "2  A political endorsement in @Indonesia\n",
       "3                 1 dog = many #retweets\n",
       "4                     Just a simple #egg"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.max_columns', 10)\n",
    "df = pd.DataFrame({\n",
    "    'tweets': ['@Obama says goodbye','Retweets for @cash','A political endorsement in @Indonesia', '1 dog = many #retweets', 'Just a simple #egg']\n",
    "    })\n",
    "print(\"Original DataFrame:\")\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_at_word(text):\n",
    "    word=re.findall(r'(?<=@)\\w+',text)\n",
    "    return \" \".join(word)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tweets</th>\n",
       "      <th>at_word</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>@Obama says goodbye</td>\n",
       "      <td>Obama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Retweets for @cash</td>\n",
       "      <td>cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A political endorsement in @Indonesia</td>\n",
       "      <td>Indonesia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1 dog = many #retweets</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Just a simple #egg</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  tweets    at_word\n",
       "0                    @Obama says goodbye      Obama\n",
       "1                     Retweets for @cash       cash\n",
       "2  A political endorsement in @Indonesia  Indonesia\n",
       "3                 1 dog = many #retweets           \n",
       "4                     Just a simple #egg           "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['at_word']=df['tweets'].apply(lambda x: find_at_word(x))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to extract only number from the specified column of a given DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>address</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001</td>\n",
       "      <td>7277 Surrey Ave.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c0002</td>\n",
       "      <td>920 N. Bishop Ave.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c0003</td>\n",
       "      <td>9910 Golden Star St.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003</td>\n",
       "      <td>25 Dunbar St.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c0004</td>\n",
       "      <td>17 West Livingston Court</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code                   address\n",
       "0        c0001          7277 Surrey Ave.\n",
       "1        c0002        920 N. Bishop Ave.\n",
       "2        c0003      9910 Golden Star St.\n",
       "3        c0003             25 Dunbar St.\n",
       "4        c0004  17 West Livingston Court"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.max_columns', 10)\n",
    "df = pd.DataFrame({\n",
    "    'company_code': ['c0001','c0002','c0003', 'c0003', 'c0004'],\n",
    "    'address': ['7277 Surrey Ave.','920 N. Bishop Ave.','9910 Golden Star St.', '25 Dunbar St.', '17 West Livingston Court']\n",
    "    })\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_number(text):\n",
    "    num = re.findall(r'[0-9]+',text)\n",
    "    return \" \".join(num)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>address</th>\n",
       "      <th>number</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001</td>\n",
       "      <td>7277 Surrey Ave.</td>\n",
       "      <td>7277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c0002</td>\n",
       "      <td>920 N. Bishop Ave.</td>\n",
       "      <td>920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c0003</td>\n",
       "      <td>9910 Golden Star St.</td>\n",
       "      <td>9910</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003</td>\n",
       "      <td>25 Dunbar St.</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c0004</td>\n",
       "      <td>17 West Livingston Court</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code                   address number\n",
       "0        c0001          7277 Surrey Ave.   7277\n",
       "1        c0002        920 N. Bishop Ave.    920\n",
       "2        c0003      9910 Golden Star St.   9910\n",
       "3        c0003             25 Dunbar St.     25\n",
       "4        c0004  17 West Livingston Court     17"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['number']=df['address'].apply(lambda x: find_number(x))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to extract only phone number from the specified column of a given DataFrame.4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>company_phone_no</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001</td>\n",
       "      <td>Company1-Phone no. 4695168357</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c0002</td>\n",
       "      <td>Company2-Phone no. 8088729013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c0003</td>\n",
       "      <td>Company3-Phone no. 6204658086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003</td>\n",
       "      <td>Company4-Phone no. 5159530096</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c0004</td>\n",
       "      <td>Company5-Phone no. 9037952371</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code               company_phone_no\n",
       "0        c0001  Company1-Phone no. 4695168357\n",
       "1        c0002  Company2-Phone no. 8088729013\n",
       "2        c0003  Company3-Phone no. 6204658086\n",
       "3        c0003  Company4-Phone no. 5159530096\n",
       "4        c0004  Company5-Phone no. 9037952371"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.max_columns', 10)\n",
    "df = pd.DataFrame({\n",
    "    'company_code': ['c0001','c0002','c0003', 'c0003', 'c0004'],\n",
    "    'company_phone_no': ['Company1-Phone no. 4695168357','Company2-Phone no. 8088729013','Company3-Phone no. 6204658086', 'Company4-Phone no. 5159530096', 'Company5-Phone no. 9037952371']\n",
    "    })\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_phone_number(text):\n",
    "    ph_no = re.findall(r\"\\b\\d{10}\\b\",text)\n",
    "    return \"\".join(ph_no)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>company_phone_no</th>\n",
       "      <th>number</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001</td>\n",
       "      <td>Company1-Phone no. 4695168357</td>\n",
       "      <td>4695168357</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c0002</td>\n",
       "      <td>Company2-Phone no. 8088729013</td>\n",
       "      <td>8088729013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c0003</td>\n",
       "      <td>Company3-Phone no. 6204658086</td>\n",
       "      <td>6204658086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003</td>\n",
       "      <td>Company4-Phone no. 5159530096</td>\n",
       "      <td>5159530096</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c0004</td>\n",
       "      <td>Company5-Phone no. 9037952371</td>\n",
       "      <td>9037952371</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code               company_phone_no      number\n",
       "0        c0001  Company1-Phone no. 4695168357  4695168357\n",
       "1        c0002  Company2-Phone no. 8088729013  8088729013\n",
       "2        c0003  Company3-Phone no. 6204658086  6204658086\n",
       "3        c0003  Company4-Phone no. 5159530096  5159530096\n",
       "4        c0004  Company5-Phone no. 9037952371  9037952371"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['number']=df['company_phone_no'].apply(lambda x: find_phone_number(x))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to extract year between 1800 to 2200 from the specified column of a given DataFrame.4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001</td>\n",
       "      <td>year 1800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c0002</td>\n",
       "      <td>year 1700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c0003</td>\n",
       "      <td>year 2300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003</td>\n",
       "      <td>year 1900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c0004</td>\n",
       "      <td>year 2200</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code       year\n",
       "0        c0001  year 1800\n",
       "1        c0002  year 1700\n",
       "2        c0003  year 2300\n",
       "3        c0003  year 1900\n",
       "4        c0004  year 2200"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.max_columns', 10)\n",
    "df = pd.DataFrame({\n",
    "    'company_code': ['c0001','c0002','c0003', 'c0003', 'c0004'],\n",
    "    'year': ['year 1800','year 1700','year 2300', 'year 1900', 'year 2200']\n",
    "    })\n",
    "print(\"Original DataFrame:\")\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>year</th>\n",
       "      <th>year_range</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001</td>\n",
       "      <td>year 1800</td>\n",
       "      <td>[1800]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c0002</td>\n",
       "      <td>year 1700</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c0003</td>\n",
       "      <td>year 2300</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003</td>\n",
       "      <td>year 1900</td>\n",
       "      <td>[1900]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c0004</td>\n",
       "      <td>year 2200</td>\n",
       "      <td>[2200]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code       year year_range\n",
       "0        c0001  year 1800     [1800]\n",
       "1        c0002  year 1700         []\n",
       "2        c0003  year 2300         []\n",
       "3        c0003  year 1900     [1900]\n",
       "4        c0004  year 2200     [2200]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def find_year(text):\n",
    "    #line=re.findall(r\"\\b(18[0][0]|2[0-2][00])\\b\",text)\n",
    "    result = re.findall(r\"\\b(18[0-9]{2}|19[0-8][0-9]|199[0-9]|2[01][0-9]{2}|2200)\\b\",text)\n",
    "    return result\n",
    "\n",
    "df['year_range']=df['year'].apply(lambda x: find_year(x))\n",
    "\n",
    "df\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>year</th>\n",
       "      <th>year_range</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001</td>\n",
       "      <td>year 1800</td>\n",
       "      <td>1800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c0002</td>\n",
       "      <td>year 1700</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c0003</td>\n",
       "      <td>year 2300</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003</td>\n",
       "      <td>year 1900</td>\n",
       "      <td>1900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c0004</td>\n",
       "      <td>year 2200</td>\n",
       "      <td>2200</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code       year year_range\n",
       "0        c0001  year 1800       1800\n",
       "1        c0002  year 1700           \n",
       "2        c0003  year 2300           \n",
       "3        c0003  year 1900       1900\n",
       "4        c0004  year 2200       2200"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['year_range']=df['year_range'].apply(lambda x:''.join(x))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to extract only non alphanumeric characters from the specified column of a given DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001#</td>\n",
       "      <td>year 1800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c00@0^2</td>\n",
       "      <td>year 1700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>$c0003</td>\n",
       "      <td>year 2300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003</td>\n",
       "      <td>year 1900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>&amp;c0004</td>\n",
       "      <td>year 2200</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code       year\n",
       "0       c0001#  year 1800\n",
       "1      c00@0^2  year 1700\n",
       "2       $c0003  year 2300\n",
       "3        c0003  year 1900\n",
       "4       &c0004  year 2200"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.max_columns', 10)\n",
    "df = pd.DataFrame({\n",
    "    'company_code': ['c0001#','c00@0^2','$c0003', 'c0003', '&c0004'],\n",
    "    'year': ['year 1800','year 1700','year 2300', 'year 1900', 'year 2200']\n",
    "    })\n",
    "print(\"Original DataFrame:\")\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_nonalpha(text):\n",
    "    result = re.findall(\"[^A-Za-z0-9 ]\",text)\n",
    "    return result\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Extracting only non alphanumeric characters from company_code:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>year</th>\n",
       "      <th>nonalpha</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001#</td>\n",
       "      <td>year 1800</td>\n",
       "      <td>[#]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c00@0^2</td>\n",
       "      <td>year 1700</td>\n",
       "      <td>[@, ^]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>$c0003</td>\n",
       "      <td>year 2300</td>\n",
       "      <td>[$]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003</td>\n",
       "      <td>year 1900</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>&amp;c0004</td>\n",
       "      <td>year 2200</td>\n",
       "      <td>[&amp;]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code       year nonalpha\n",
       "0       c0001#  year 1800      [#]\n",
       "1      c00@0^2  year 1700   [@, ^]\n",
       "2       $c0003  year 2300      [$]\n",
       "3        c0003  year 1900       []\n",
       "4       &c0004  year 2200      [&]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['nonalpha']=df['company_code'].apply(lambda x: find_nonalpha(x))\n",
    "print(\"\\nExtracting only non alphanumeric characters from company_code:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to extract only punctuations from the specified column of a given DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001.</td>\n",
       "      <td>year 1800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c000,2</td>\n",
       "      <td>year 1700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c0003</td>\n",
       "      <td>year 2300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003#</td>\n",
       "      <td>year 1900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c0004,</td>\n",
       "      <td>year 2200</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code       year\n",
       "0       c0001.  year 1800\n",
       "1       c000,2  year 1700\n",
       "2        c0003  year 2300\n",
       "3       c0003#  year 1900\n",
       "4       c0004,  year 2200"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.max_columns', 10)\n",
    "df = pd.DataFrame({\n",
    "    'company_code': ['c0001.','c000,2','c0003', 'c0003#', 'c0004,'],\n",
    "    'year': ['year 1800','year 1700','year 2300', 'year 1900', 'year 2200']\n",
    "    })\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_punctuations(text):\n",
    "    result = re.findall(r'[!\"\\$%&\\'()*+,\\-.\\/:;=#@?\\[\\\\\\]^_`{|}~]*', text)\n",
    "    string=\"\".join(result)\n",
    "    return list(string)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Extracting punctuation:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>year</th>\n",
       "      <th>nonalpha</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001.</td>\n",
       "      <td>year 1800</td>\n",
       "      <td>[.]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c000,2</td>\n",
       "      <td>year 1700</td>\n",
       "      <td>[,]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c0003</td>\n",
       "      <td>year 2300</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003#</td>\n",
       "      <td>year 1900</td>\n",
       "      <td>[#]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c0004,</td>\n",
       "      <td>year 2200</td>\n",
       "      <td>[,]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code       year nonalpha\n",
       "0       c0001.  year 1800      [.]\n",
       "1       c000,2  year 1700      [,]\n",
       "2        c0003  year 2300       []\n",
       "3       c0003#  year 1900      [#]\n",
       "4       c0004,  year 2200      [,]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['nonalpha']=df['company_code'].apply(lambda x: find_punctuations(x))\n",
    "print(\"\\nExtracting punctuation:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to remove repetitive characters from the specified column of a given DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>text_code</th>\n",
       "      <th>text_lang</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>t0001.</td>\n",
       "      <td>She livedd a long life.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>t0002</td>\n",
       "      <td>How oold is your father?</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>t0003</td>\n",
       "      <td>What is tthe problem?</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>t0004</td>\n",
       "      <td>TThhis desk is used by Tom.</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  text_code                    text_lang\n",
       "0    t0001.      She livedd a long life.\n",
       "1     t0002     How oold is your father?\n",
       "2     t0003        What is tthe problem?\n",
       "3     t0004  TThhis desk is used by Tom."
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.max_columns', 10)\n",
    "df = pd.DataFrame({\n",
    "    'text_code': ['t0001.','t0002','t0003', 't0004'],\n",
    "    'text_lang': ['She livedd a long life.', 'How oold is your father?', 'What is tthe problem?','TThhis desk is used by Tom.']\n",
    "    })\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rep_char(str1):\n",
    "    tchr = str1.group(0)\n",
    "    if len(tchr) > 1:\n",
    "        return tchr[0:1] # can change the value here on repetition\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "def unique_char(rep, sent_text):\n",
    "    convert = re.sub(r'(\\w)\\1+', rep, sent_text) \n",
    "    return convert\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Remove repetitive characters:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>text_code</th>\n",
       "      <th>text_lang</th>\n",
       "      <th>normal_text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>t0001.</td>\n",
       "      <td>She livedd a long life.</td>\n",
       "      <td>She lived a long life.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>t0002</td>\n",
       "      <td>How oold is your father?</td>\n",
       "      <td>How old is your father?</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>t0003</td>\n",
       "      <td>What is tthe problem?</td>\n",
       "      <td>What is the problem?</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>t0004</td>\n",
       "      <td>TThhis desk is used by Tom.</td>\n",
       "      <td>This desk is used by Tom.</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  text_code                    text_lang                normal_text\n",
       "0    t0001.      She livedd a long life.     She lived a long life.\n",
       "1     t0002     How oold is your father?    How old is your father?\n",
       "2     t0003        What is tthe problem?       What is the problem?\n",
       "3     t0004  TThhis desk is used by Tom.  This desk is used by Tom."
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['normal_text']=df['text_lang'].apply(lambda x : unique_char(rep_char,x))\n",
    "print(\"\\nRemove repetitive characters:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to extract numbers greater than 940 from the specified column of a given DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>address</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001</td>\n",
       "      <td>7277 Surrey Ave.1111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c0002</td>\n",
       "      <td>920 N. Bishop Ave.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c0003</td>\n",
       "      <td>9910 Golden Star St.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003</td>\n",
       "      <td>1025 Dunbar St.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c0004</td>\n",
       "      <td>1700 West Livingston Court</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code                     address\n",
       "0        c0001        7277 Surrey Ave.1111\n",
       "1        c0002          920 N. Bishop Ave.\n",
       "2        c0003        9910 Golden Star St.\n",
       "3        c0003             1025 Dunbar St.\n",
       "4        c0004  1700 West Livingston Court"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.max_columns', 10)\n",
    "df = pd.DataFrame({\n",
    "    'company_code': ['c0001','c0002','c0003', 'c0003', 'c0004'],\n",
    "    'address': ['7277 Surrey Ave.1111','920 N. Bishop Ave.','9910 Golden Star St.', '1025 Dunbar St.', '1700 West Livingston Court']\n",
    "    })\n",
    "print(\"Original DataFrame:\")\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "def test_num_great(text): \n",
    "    result = re.findall(r'95[5-9]|9[6-9]\\d|[1-9]\\d{3,}',text)\n",
    "    return \" \".join(result)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Number greater than 940:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>address</th>\n",
       "      <th>num_great</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001</td>\n",
       "      <td>7277 Surrey Ave.1111</td>\n",
       "      <td>7277 1111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c0002</td>\n",
       "      <td>920 N. Bishop Ave.</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c0003</td>\n",
       "      <td>9910 Golden Star St.</td>\n",
       "      <td>991</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003</td>\n",
       "      <td>1025 Dunbar St.</td>\n",
       "      <td>1025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c0004</td>\n",
       "      <td>1700 West Livingston Court</td>\n",
       "      <td>1700</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code                     address  num_great\n",
       "0        c0001        7277 Surrey Ave.1111  7277 1111\n",
       "1        c0002          920 N. Bishop Ave.           \n",
       "2        c0003        9910 Golden Star St.        991\n",
       "3        c0003             1025 Dunbar St.       1025\n",
       "4        c0004  1700 West Livingston Court       1700"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['num_great']=df['address'].apply(lambda x : test_num_great(x))\n",
    "print(\"\\nNumber greater than 940:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to extract numbers less than 100 from the specified column of a given DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>address</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001</td>\n",
       "      <td>72 Surrey Ave.11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c0002</td>\n",
       "      <td>92 N. Bishop Ave.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c0003</td>\n",
       "      <td>9910 Golden Star St.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003</td>\n",
       "      <td>102 Dunbar St.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c0004</td>\n",
       "      <td>17 West Livingston Court</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code                   address\n",
       "0        c0001          72 Surrey Ave.11\n",
       "1        c0002         92 N. Bishop Ave.\n",
       "2        c0003      9910 Golden Star St.\n",
       "3        c0003            102 Dunbar St.\n",
       "4        c0004  17 West Livingston Court"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.max_columns', 10)\n",
    "df = pd.DataFrame({\n",
    "    'company_code': ['c0001','c0002','c0003', 'c0003', 'c0004'],\n",
    "    'address': ['72 Surrey Ave.11','92 N. Bishop Ave.','9910 Golden Star St.', '102 Dunbar St.', '17 West Livingston Court']\n",
    "    })\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "def test_num_less(n):\n",
    "    nums = []\n",
    "    for i in n.split():\n",
    "        result = re.findall(r'\\b(0*(?:[1-9][0-9]?|100))\\b',i)\n",
    "        nums.append(result)\n",
    "        all_num=[\",\".join(x) for x in nums if x != []]\n",
    "    return \" \".join(all_num)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Number less than 100:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>address</th>\n",
       "      <th>num_less</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001</td>\n",
       "      <td>72 Surrey Ave.11</td>\n",
       "      <td>72 11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c0002</td>\n",
       "      <td>92 N. Bishop Ave.</td>\n",
       "      <td>92</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c0003</td>\n",
       "      <td>9910 Golden Star St.</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003</td>\n",
       "      <td>102 Dunbar St.</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c0004</td>\n",
       "      <td>17 West Livingston Court</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code                   address num_less\n",
       "0        c0001          72 Surrey Ave.11    72 11\n",
       "1        c0002         92 N. Bishop Ave.       92\n",
       "2        c0003      9910 Golden Star St.         \n",
       "3        c0003            102 Dunbar St.         \n",
       "4        c0004  17 West Livingston Court       17"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "df['num_less'] = df['address'].apply(lambda x : test_num_less(x))\n",
    "print(\"\\nNumber less than 100:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to check whether two given words present in a specified column of a given DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>address</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001</td>\n",
       "      <td>9910 Surrey Ave.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c0002</td>\n",
       "      <td>92 N. Bishop Ave.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c0003</td>\n",
       "      <td>9910 Golden Star Ave.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003</td>\n",
       "      <td>102 Dunbar St.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c0004</td>\n",
       "      <td>17 West Livingston Court</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code                   address\n",
       "0        c0001          9910 Surrey Ave.\n",
       "1        c0002         92 N. Bishop Ave.\n",
       "2        c0003     9910 Golden Star Ave.\n",
       "3        c0003            102 Dunbar St.\n",
       "4        c0004  17 West Livingston Court"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.max_columns', 10)\n",
    "df = pd.DataFrame({\n",
    "    'company_code': ['c0001','c0002','c0003', 'c0003', 'c0004'],\n",
    "    'address': ['9910 Surrey Ave.','92 N. Bishop Ave.','9910 Golden Star Ave.', '102 Dunbar St.', '17 West Livingston Court']\n",
    "    })\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Present two words!\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>address</th>\n",
       "      <th>check_two_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c0001</td>\n",
       "      <td>9910 Surrey Ave.</td>\n",
       "      <td>9910 Surrey Ave.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c0002</td>\n",
       "      <td>92 N. Bishop Ave.</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c0003</td>\n",
       "      <td>9910 Golden Star Ave.</td>\n",
       "      <td>9910 Golden Star Ave.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c0003</td>\n",
       "      <td>102 Dunbar St.</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>c0004</td>\n",
       "      <td>17 West Livingston Court</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code                   address        check_two_words\n",
       "0        c0001          9910 Surrey Ave.       9910 Surrey Ave.\n",
       "1        c0002         92 N. Bishop Ave.                       \n",
       "2        c0003     9910 Golden Star Ave.  9910 Golden Star Ave.\n",
       "3        c0003            102 Dunbar St.                       \n",
       "4        c0004  17 West Livingston Court                       "
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def test_and_cond(text):\n",
    "    result = re.findall(r'(?=.*Ave.)(?=.*9910).*', text) \n",
    "    return \" \".join(result)\n",
    "df['check_two_words']=df['address'].apply(lambda x : test_and_cond(x))\n",
    "print(\"\\nPresent two words!\")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to extract date (format: mm-dd-yyyy) from a given column of a given DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>zefsalf</td>\n",
       "      <td>05/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>zekfsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale  sale_amount\n",
       "0         Abcd   12/05/2002      12348.5\n",
       "1         EFGF   16/02/1999     233331.2\n",
       "2      zefsalf   05/09/1998         22.5\n",
       "3      sdfslew   12/02/2022    2566552.0\n",
       "4      zekfsdf   15/09/1997         23.0"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'company_code': ['Abcd','EFGF', 'zefsalf', 'sdfslew', 'zekfsdf'],\n",
    "    'date_of_sale': ['12/05/2002','16/02/1999','05/09/1998','12/02/2022','15/09/1997'],\n",
    "    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]\n",
    "})\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_valid_dates(dt):\n",
    "    #format: mm-dd-yyyy\n",
    "    result = re.findall(r'\\b(1[0-2]|0[1-9])/(3[01]|[12][0-9]|0[1-9])/([0-9]{4})\\b',dt)\n",
    "    return result\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Valid dates (format: mm-dd-yyyy):\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>sale_amount</th>\n",
       "      <th>valid_dates</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>12348.5</td>\n",
       "      <td>[(12, 05, 2002)]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>233331.2</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>zefsalf</td>\n",
       "      <td>05/09/1998</td>\n",
       "      <td>22.5</td>\n",
       "      <td>[(05, 09, 1998)]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>2566552.0</td>\n",
       "      <td>[(12, 02, 2022)]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>zekfsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>23.0</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale  sale_amount       valid_dates\n",
       "0         Abcd   12/05/2002      12348.5  [(12, 05, 2002)]\n",
       "1         EFGF   16/02/1999     233331.2                []\n",
       "2      zefsalf   05/09/1998         22.5  [(05, 09, 1998)]\n",
       "3      sdfslew   12/02/2022    2566552.0  [(12, 02, 2022)]\n",
       "4      zekfsdf   15/09/1997         23.0                []"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['valid_dates']=df['date_of_sale'].apply(lambda dt : find_valid_dates(dt))\n",
    "print(\"\\nValid dates (format: mm-dd-yyyy):\")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to extract only words from a given column of a given DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>address</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>9910 Surrey Ave.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>92 N. Bishop Ave.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>zefsalf</td>\n",
       "      <td>05/09/1998</td>\n",
       "      <td>9910 Golden Star Ave.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>102 Dunbar St.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>zekfsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>17 West Livingston Court</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale                   address\n",
       "0         Abcd   12/05/2002          9910 Surrey Ave.\n",
       "1         EFGF   16/02/1999         92 N. Bishop Ave.\n",
       "2      zefsalf   05/09/1998     9910 Golden Star Ave.\n",
       "3      sdfslew   12/02/2022            102 Dunbar St.\n",
       "4      zekfsdf   15/09/1997  17 West Livingston Court"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'company_code': ['Abcd','EFGF', 'zefsalf', 'sdfslew', 'zekfsdf'],\n",
    "    'date_of_sale': ['12/05/2002','16/02/1999','05/09/1998','12/02/2022','15/09/1997'],\n",
    "    'address': ['9910 Surrey Ave.','92 N. Bishop Ave.','9910 Golden Star Ave.', '102 Dunbar St.', '17 West Livingston Court']\n",
    "})\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "def search_words(text):\n",
    "    result = re.findall(r'\\b[^\\d\\W]+\\b', text)\n",
    "    return \" \".join(result)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Only words:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>address</th>\n",
       "      <th>only_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>9910 Surrey Ave.</td>\n",
       "      <td>Surrey Ave</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>92 N. Bishop Ave.</td>\n",
       "      <td>N Bishop Ave</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>zefsalf</td>\n",
       "      <td>05/09/1998</td>\n",
       "      <td>9910 Golden Star Ave.</td>\n",
       "      <td>Golden Star Ave</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>102 Dunbar St.</td>\n",
       "      <td>Dunbar St</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>zekfsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>17 West Livingston Court</td>\n",
       "      <td>West Livingston Court</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale                   address             only_words\n",
       "0         Abcd   12/05/2002          9910 Surrey Ave.             Surrey Ave\n",
       "1         EFGF   16/02/1999         92 N. Bishop Ave.           N Bishop Ave\n",
       "2      zefsalf   05/09/1998     9910 Golden Star Ave.        Golden Star Ave\n",
       "3      sdfslew   12/02/2022            102 Dunbar St.              Dunbar St\n",
       "4      zekfsdf   15/09/1997  17 West Livingston Court  West Livingston Court"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['only_words']=df['address'].apply(lambda x : search_words(x))\n",
    "print(\"\\nOnly words:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Write a Pandas program to extract words starting with capital words from a given column of a given DataFrame.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original DataFrame:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>address</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>9910 Surrey Avenue</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>92 N. Bishop Avenue</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>zefsalf</td>\n",
       "      <td>05/09/1998</td>\n",
       "      <td>9910 Golden Star Avenue</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>102 Dunbar St.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>zekfsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>17 West Livingston Court</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale                   address\n",
       "0         Abcd   12/05/2002        9910 Surrey Avenue\n",
       "1         EFGF   16/02/1999       92 N. Bishop Avenue\n",
       "2      zefsalf   05/09/1998   9910 Golden Star Avenue\n",
       "3      sdfslew   12/02/2022            102 Dunbar St.\n",
       "4      zekfsdf   15/09/1997  17 West Livingston Court"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'company_code': ['Abcd','EFGF', 'zefsalf', 'sdfslew', 'zekfsdf'],\n",
    "    'date_of_sale': ['12/05/2002','16/02/1999','05/09/1998','12/02/2022','15/09/1997'],\n",
    "    'address': ['9910 Surrey Avenue','92 N. Bishop Avenue','9910 Golden Star Avenue', '102 Dunbar St.', '17 West Livingston Court']\n",
    "})\n",
    "\n",
    "print(\"Original DataFrame:\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>address</th>\n",
       "      <th>caps_word_in</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>9910 Surrey Avenue</td>\n",
       "      <td>[Surrey, Avenue]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>92 N. Bishop Avenue</td>\n",
       "      <td>[Bishop, Avenue]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>zefsalf</td>\n",
       "      <td>05/09/1998</td>\n",
       "      <td>9910 Golden Star Avenue</td>\n",
       "      <td>[Golden, Star, Avenue]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>102 Dunbar St.</td>\n",
       "      <td>[Dunbar, St]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>zekfsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>17 West Livingston Court</td>\n",
       "      <td>[West, Livingston, Court]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale                   address  \\\n",
       "0         Abcd   12/05/2002        9910 Surrey Avenue   \n",
       "1         EFGF   16/02/1999       92 N. Bishop Avenue   \n",
       "2      zefsalf   05/09/1998   9910 Golden Star Avenue   \n",
       "3      sdfslew   12/02/2022            102 Dunbar St.   \n",
       "4      zekfsdf   15/09/1997  17 West Livingston Court   \n",
       "\n",
       "                caps_word_in  \n",
       "0           [Surrey, Avenue]  \n",
       "1           [Bishop, Avenue]  \n",
       "2     [Golden, Star, Avenue]  \n",
       "3               [Dunbar, St]  \n",
       "4  [West, Livingston, Court]  "
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def find_capital_word(str1):\n",
    "    result = re.findall(r'\\b[A-Z]\\w+', str1)\n",
    "    return result\n",
    "df['caps_word_in']=df['address'].apply(lambda cw : find_capital_word(cw))\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Extract words starting with capital words from the sentences':\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company_code</th>\n",
       "      <th>date_of_sale</th>\n",
       "      <th>address</th>\n",
       "      <th>caps_word_in</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abcd</td>\n",
       "      <td>12/05/2002</td>\n",
       "      <td>9910 Surrey Avenue</td>\n",
       "      <td>[Surrey, Avenue]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EFGF</td>\n",
       "      <td>16/02/1999</td>\n",
       "      <td>92 N. Bishop Avenue</td>\n",
       "      <td>[Bishop, Avenue]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>zefsalf</td>\n",
       "      <td>05/09/1998</td>\n",
       "      <td>9910 Golden Star Avenue</td>\n",
       "      <td>[Golden, Star, Avenue]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sdfslew</td>\n",
       "      <td>12/02/2022</td>\n",
       "      <td>102 Dunbar St.</td>\n",
       "      <td>[Dunbar, St]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>zekfsdf</td>\n",
       "      <td>15/09/1997</td>\n",
       "      <td>17 West Livingston Court</td>\n",
       "      <td>[West, Livingston, Court]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  company_code date_of_sale                   address  \\\n",
       "0         Abcd   12/05/2002        9910 Surrey Avenue   \n",
       "1         EFGF   16/02/1999       92 N. Bishop Avenue   \n",
       "2      zefsalf   05/09/1998   9910 Golden Star Avenue   \n",
       "3      sdfslew   12/02/2022            102 Dunbar St.   \n",
       "4      zekfsdf   15/09/1997  17 West Livingston Court   \n",
       "\n",
       "                caps_word_in  \n",
       "0           [Surrey, Avenue]  \n",
       "1           [Bishop, Avenue]  \n",
       "2     [Golden, Star, Avenue]  \n",
       "3               [Dunbar, St]  \n",
       "4  [West, Livingston, Court]  "
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nExtract words starting with capital words from the sentences':\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
